load libraries

In [1]:
import os
import cv2
import glob
import numpy as np
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *
Using TensorFlow backend.

加载数据集

In [2]:
basedir = "/ext/Data/distracted_driver_detection/"

model_image_size = 224

print("-------- loading train data")
X_train = list()
y_train = list()
for i in range(10):
    dir = os.path.join(basedir, "train", "c%d"%i)
    image_files = glob.glob(os.path.join(dir,"*.jpg"))
    print("loding {}, image count={}".format(dir, len(image_files)))
    for image_file in image_files:
        image = cv2.imread(image_file)
        X_train.append(cv2.resize(image, (model_image_size, model_image_size)))
        label = np.zeros(10, dtype=np.uint8)
        label[i]=1
        y_train.append(label)
X_train = np.array(X_train)
y_train = np.array(y_train)
        
print("-------- loading valid data")
X_valid = list()
y_valid = list()
for i in range(10):
    dir = os.path.join(basedir, "valid", "c%d"%i)
    image_files = glob.glob(os.path.join(dir,"*.jpg"))
    print("loding {}, image count={}".format(dir, len(image_files)))
    for image_file in image_files:
        image = cv2.imread(image_file)
        X_valid.append(cv2.resize(image, (model_image_size, model_image_size)))
        label = np.zeros(10, dtype=np.uint8)
        label[i]=1
        y_valid.append(label)
X_valid = np.array(X_valid)
y_valid = np.array(y_valid)
-------- loading train data
loding /ext/Data/distracted_driver_detection/train/c0, image count=2308
loding /ext/Data/distracted_driver_detection/train/c1, image count=2096
loding /ext/Data/distracted_driver_detection/train/c2, image count=2136
loding /ext/Data/distracted_driver_detection/train/c3, image count=2185
loding /ext/Data/distracted_driver_detection/train/c4, image count=2160
loding /ext/Data/distracted_driver_detection/train/c5, image count=2152
loding /ext/Data/distracted_driver_detection/train/c6, image count=2164
loding /ext/Data/distracted_driver_detection/train/c7, image count=1843
loding /ext/Data/distracted_driver_detection/train/c8, image count=1771
loding /ext/Data/distracted_driver_detection/train/c9, image count=1972
-------- loading valid data
loding /ext/Data/distracted_driver_detection/valid/c0, image count=181
loding /ext/Data/distracted_driver_detection/valid/c1, image count=171
loding /ext/Data/distracted_driver_detection/valid/c2, image count=181
loding /ext/Data/distracted_driver_detection/valid/c3, image count=161
loding /ext/Data/distracted_driver_detection/valid/c4, image count=166
loding /ext/Data/distracted_driver_detection/valid/c5, image count=160
loding /ext/Data/distracted_driver_detection/valid/c6, image count=161
loding /ext/Data/distracted_driver_detection/valid/c7, image count=159
loding /ext/Data/distracted_driver_detection/valid/c8, image count=140
loding /ext/Data/distracted_driver_detection/valid/c9, image count=157

分为训练集和验证集

In [4]:
print(X_train.shape)
print(y_train.shape)
print(X_valid.shape)
print(y_valid.shape)
(20787, 224, 224, 3)
(20787, 10)
(1637, 224, 224, 3)
(1637, 10)
In [5]:
base_model = VGG16(input_tensor=Input((model_image_size, model_image_size, 3)), weights='imagenet', include_top=False)

for layers in base_model.layers:
    layers.trainable = False

x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.25)(x)
x = Dense(10, activation='softmax')(x)
model = Model(base_model.input, x)
model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])

#     model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
print("done")
done

训练模型

In [6]:
model.fit(X_train, y_train, batch_size=16, epochs=10, validation_data=(X_valid, y_valid))
model.save("models/vgg16-mymodel.h5")
Train on 20787 samples, validate on 1637 samples
Epoch 1/10
20787/20787 [==============================] - 90s - loss: 0.7259 - acc: 0.8577 - val_loss: 0.6064 - val_acc: 0.8640 - ETA: 166s - loss: 1.2703 - acc: 0.8257 - ETA: 55s - loss: 0.9788 - acc: 0.8379 - ETA: 49s - loss: 0.9452 - acc: 0.8398 - ETA: 29s - loss: 0.8438 - acc: 0.8467 - ETA: 20s - loss: 0.8042 - acc: 0.8506
Epoch 2/10
20787/20787 [==============================] - 86s - loss: 0.3030 - acc: 0.9122 - val_loss: 0.5393 - val_acc: 0.8724 - ETA: 65s - loss: 0.4016 - acc: 0.8932 - ETA: 53s - loss: 0.3787 - acc: 0.8969 - ETA: 9s - loss: 0.3138 - acc: 0.9104 - ETA: 7s - loss: 0.3109 - acc: 0.9110
Epoch 3/10
20787/20787 [==============================] - 86s - loss: 0.1797 - acc: 0.9402 - val_loss: 0.4224 - val_acc: 0.8872 - ETA: 40s - loss: 0.1965 - acc: 0.9358 - ETA: 38s - loss: 0.1955 - acc: 0.9361 - ETA: 30s - loss: 0.1916 - acc: 0.9372 - ETA: 18s - loss: 0.1865 - acc: 0.9383
Epoch 4/10
20787/20787 [==============================] - 85s - loss: 0.1288 - acc: 0.9544 - val_loss: 0.4054 - val_acc: 0.8893 - ETA: 74s - loss: 0.1375 - acc: 0.9491 - ETA: 54s - loss: 0.1391 - acc: 0.9518 - ETA: 49s - loss: 0.1369 - acc: 0.9526
Epoch 5/10
20787/20787 [==============================] - 84s - loss: 0.1061 - acc: 0.9614 - val_loss: 0.4279 - val_acc: 0.8889 - ETA: 84s - loss: 0.1116 - acc: 0.9563
Epoch 6/10
20787/20787 [==============================] - 83s - loss: 0.0945 - acc: 0.9654 - val_loss: 0.4032 - val_acc: 0.8870
Epoch 7/10
20787/20787 [==============================] - 84s - loss: 0.0870 - acc: 0.9680 - val_loss: 0.3909 - val_acc: 0.8948
Epoch 8/10
20787/20787 [==============================] - 84s - loss: 0.0832 - acc: 0.9695 - val_loss: 0.3700 - val_acc: 0.8940
Epoch 9/10
20787/20787 [==============================] - 84s - loss: 0.0780 - acc: 0.9715 - val_loss: 0.4003 - val_acc: 0.8933
Epoch 10/10
20787/20787 [==============================] - 84s - loss: 0.0751 - acc: 0.9726 - val_loss: 0.3621 - val_acc: 0.8925
In [7]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.models import *

model = load_model("models/vgg16-mymodel.h5")
print("load successed")

SVG(model_to_dot(model).create(prog='dot', format='svg'))
load successed
Out[7]:
G 139793206658608 input_1: InputLayer 139793206658720 block1_conv1: Conv2D 139793206658608->139793206658720 139793206660736 block1_conv2: Conv2D 139793206658720->139793206660736 139793206659336 block1_pool: MaxPooling2D 139793206660736->139793206659336 139793206659728 block2_conv1: Conv2D 139793206659336->139793206659728 139793206659840 block2_conv2: Conv2D 139793206659728->139793206659840 139793206747488 block2_pool: MaxPooling2D 139793206659840->139793206747488 139793206747712 block3_conv1: Conv2D 139793206747488->139793206747712 139793206748048 block3_conv2: Conv2D 139793206747712->139793206748048 139793206748440 block3_conv3: Conv2D 139793206748048->139793206748440 139793206748832 block3_pool: MaxPooling2D 139793206748440->139793206748832 139793206749056 block4_conv1: Conv2D 139793206748832->139793206749056 139793206749392 block4_conv2: Conv2D 139793206749056->139793206749392 139793206749784 block4_conv3: Conv2D 139793206749392->139793206749784 139793206750176 block4_pool: MaxPooling2D 139793206749784->139793206750176 139793206750400 block5_conv1: Conv2D 139793206750176->139793206750400 139793206750736 block5_conv2: Conv2D 139793206750400->139793206750736 139793206660960 block5_conv3: Conv2D 139793206750736->139793206660960 139793206423904 block5_pool: MaxPooling2D 139793206660960->139793206423904 139793206424128 global_average_pooling2d_1: GlobalAveragePooling2D 139793206423904->139793206424128 139793206424240 dropout_1: Dropout 139793206424128->139793206424240 139793206424296 dense_1: Dense 139793206424240->139793206424296

CAM 可视化

http://cnnlocalization.csail.mit.edu/

$cam = (P-0.5)*w*output$

  • cam: 类激活图 7*7
  • P: 概率
  • output: 卷积层的输出 512*1
  • w: 卷积核的权重 x*x*2048
In [8]:
z = zip([x.name for x in model.layers], range(len(model.layers)))
for k, v in z:
    print("{} - {}".format(k,v))
input_1 - 0
block1_conv1 - 1
block1_conv2 - 2
block1_pool - 3
block2_conv1 - 4
block2_conv2 - 5
block2_pool - 6
block3_conv1 - 7
block3_conv2 - 8
block3_conv3 - 9
block3_pool - 10
block4_conv1 - 11
block4_conv2 - 12
block4_conv3 - 13
block4_pool - 14
block5_conv1 - 15
block5_conv2 - 16
block5_conv3 - 17
block5_pool - 18
global_average_pooling2d_1 - 19
dropout_1 - 20
dense_1 - 21
In [9]:
import matplotlib.pyplot as plt
import random
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

def show_heatmap_image(model_show, weights_show):
    test_dir = os.path.join(basedir,  "test", "test" )
    image_files = glob.glob(os.path.join(test_dir,"*"))
    print(len(image_files))
    
    plt.figure(figsize=(12, 14))
    for i in range(16):
        plt.subplot(4, 4, i+1)
        img = cv2.imread(image_files[2000*i+113])
        img = cv2.resize(img, (model_image_size, model_image_size))
        x = img.copy()
        x.astype(np.float32)
        out, predictions = model_show.predict(np.expand_dims(x, axis=0))
        predictions = predictions[0]
        out = out[0]
        
        max_idx = np.argmax(predictions)
        prediction = predictions[max_idx]

        status = ["safe driving",  " texting - right",  "phone - right",  "texting - left",  "phone - left",  
                  "operation radio", "drinking", "reaching behind", "hair and makeup", "talking"]

        plt.title('c%d |%s| %.2f%%' % (max_idx , status[max_idx], prediction*100))
    
        cam = (prediction - 0.5) * np.matmul(out, weights_show)
        cam = cam[:,:,max_idx]
        cam -= cam.min()
        cam /= cam.max()
        cam -= 0.2
        cam /= 0.8

        cam = cv2.resize(cam, (model_image_size, model_image_size))
        heatmap = cv2.applyColorMap(np.uint8(255*cam), cv2.COLORMAP_JET)
        heatmap[np.where(cam <= 0.2)] = 0

        out = cv2.addWeighted(img, 0.8, heatmap, 0.4, 0)

        plt.axis('off')
        plt.imshow(out[:,:,::-1])
print("done")
done
In [10]:
weights = model.layers[21].get_weights()[0]
layer_output = model.layers[18].output
model2 = Model(model.input, [layer_output, model.output])
print("layer_output {0}".format(layer_output))
print("weights shape {0}".format(weights.shape))
show_heatmap_image(model2, weights)
layer_output Tensor("block5_pool_1/MaxPool:0", shape=(?, 7, 7, 512), dtype=float32)
weights shape (512, 10)
79726
In [11]:
weights = model.layers[21].get_weights()[0]
layer_output = model.layers[16].output
model2 = Model(model.input, [layer_output, model.output])
print("layer_output {0}".format(layer_output))
print("weights shape {0}".format(weights.shape))
show_heatmap_image(model2, weights)
layer_output Tensor("block5_conv2_1/Relu:0", shape=(?, 14, 14, 512), dtype=float32)
weights shape (512, 10)
79726
In [12]:
weights = model.layers[21].get_weights()[0]
layer_output = model.layers[14].output
model2 = Model(model.input, [layer_output, model.output])
print("layer_output {0}".format(layer_output))
print("weights shape {0}".format(weights.shape))
show_heatmap_image(model2, weights)
layer_output Tensor("block4_pool_1/MaxPool:0", shape=(?, 14, 14, 512), dtype=float32)
weights shape (512, 10)
79726
In [13]:
weights = model.layers[21].get_weights()[0]
layer_output = model.layers[11].output
model2 = Model(model.input, [layer_output, model.output])
print("layer_output {0}".format(layer_output))
print("weights shape {0}".format(weights.shape))
show_heatmap_image(model2, weights)
layer_output Tensor("block4_conv1_1/Relu:0", shape=(?, 28, 28, 512), dtype=float32)
weights shape (512, 10)
79726
In [ ]: